Street trees and incomes in New York City

Author

Josephine Cardelle

# Load necessary libraries
library(tidycensus)
Warning: package 'tidycensus' was built under R version 4.4.1
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
here() starts at /Users/jocardelle/MEDS/Fall_24/EDS-222-statistics/final-project/nyc-trees
library(stars)
Loading required package: abind
Warning: package 'abind' was built under R version 4.4.1
Loading required package: sf
Warning: package 'sf' was built under R version 4.4.1
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyr)
library(dplyr)
library(units)
udunits database from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/units/share/udunits/udunits2.xml
library(mapview)
# Hypothesis graph
df <- data.frame(x = c(1:100))
df$y <- 1/3 * df$x

ggplot (df, aes(x, y)) +
  geom_line() +
  xlim(0, 50) +
  ylim(0, 50) +
  labs(title = "Trees and Income in NYC Hypothesis",
       x = "median income($)",
       y = expression(paste("tree per ", km^2))) +
  theme_minimal() +
  theme(axis.text.x=element_blank(),
        axis.text.y = element_blank())
Warning: Removed 50 rows containing missing values or values outside the scale range
(`geom_line()`).

# Read in income for New York City census tracts
nyc <- get_acs(
  state = "NY",
  county = c("Bronx", "Kings", "New York", "Richmond", "Queens"),
  geography = "tract",
  variables = "B19013_001",
  geometry = TRUE,
  year = 2015
)
Getting data from the 2011-2015 5-year ACS
Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |======================================================================| 100%
# Read in tree data
nyc_trees <- read_csv(here('data/2015StreetTreesCensus_TREES.csv'))
Rows: 683788 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (28): created_at, the_geom, curb_loc, status, health, spc_latin, spc_com...
dbl (14): tree_id, block_id, tree_dbh, stump_diam, cb_num, borocode, cncldis...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the different status' of the trees
unique(nyc_trees$status)
[1] "Alive" "Dead"  "Stump"
#  Filter to trees that are alive
nyc_trees_alive <- nyc_trees %>% 
  filter(status == "Alive")
# Add new column to nyc that calculate the area for each census tract
nyc <- nyc %>%
  mutate(area_km2 = as.numeric(st_area(geometry)/1e6))

# Map income by census tract
mapview(nyc,
        zcol = "estimate",
        layer.name = "Median income ($)")
# Make trees data set into sf object and set crs to match
nyc_trees_sf <- st_as_sf(nyc_trees_alive, coords = c("longitude", "Latitude"), crs = st_crs(nyc))

# Join trees and income by st_within and coount trees in each census tract
nyc_trees_income <- nyc_trees_sf %>% 
  st_join(nyc, join = st_within) %>% 
  group_by(GEOID) %>% 
  summarize(tree_count = n())

# Add tree count data back to income data
treecount_income <- st_join(nyc, nyc_trees_income) %>% 
  select(-c('GEOID.x', 'GEOID.y'))

# Add tree per km2 column
treecount_income <- treecount_income %>% 
  mutate(tree_per_km2 = (tree_count/area_km2))

# Map trees per km2
mapview(treecount_income,
        zcol = "tree_per_km2",
        layer.name = "trees per square kilometer")
ggplot(treecount_income, aes(x = estimate, y = tree_per_km2)) +
  geom_point() +
  geom_smooth(method = 'lm') +
  labs(title = "Trees and Income in NYC",
       x = "median income($)",
       y = expression(paste("tree per ", km^2)))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 67 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 67 rows containing missing values or values outside the scale range
(`geom_point()`).

summary(lm(tree_per_km2 ~ estimate, treecount_income))

Call:
lm(formula = tree_per_km2 ~ estimate, data = treecount_income)

Residuals:
     Min       1Q   Median       3Q      Max 
-1187.90  -307.49     8.64   315.41  1615.92 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.066e+03  2.358e+01  45.199  < 2e-16 ***
estimate    1.344e-03  3.566e-04   3.769 0.000169 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 479.6 on 2098 degrees of freedom
  (67 observations deleted due to missingness)
Multiple R-squared:  0.006725,  Adjusted R-squared:  0.006252 
F-statistic:  14.2 on 1 and 2098 DF,  p-value: 0.0001685
lm_trees <- lm(tree_per_km2 ~ estimate, treecount_income)

Residuals